{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Updatable Neural Network Classifier\n",
    "This notebook demonstrates the process of creating a simple convolutional neural network classifier model that can be used for training on the MNIST-like dataset. The model is created using Keras, and then converted to a Core ML format using keras_converter. Once in Core ML format, we mark last two fully connected layers as updatable. Lastly, we attach a categorical cross entropy loss layer to the last layer and use SGD as the optimizer. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_keras_base_model(url):\n",
    "    \"\"\"This method creates a convolutional neural network model using Keras.\n",
    "    url - The URL that the keras model will be saved as h5 file.\n",
    "    \"\"\"\n",
    "    \n",
    "    import keras\n",
    "    from keras.models import Sequential\n",
    "    from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D\n",
    "    \n",
    "    keras.backend.clear_session()\n",
    "    model = Sequential()\n",
    "    model.add(Conv2D(32, kernel_size=(3, 3),\n",
    "                     activation='relu',\n",
    "                     input_shape=(28, 28, 1)))\n",
    "    model.add(Conv2D(64, (3, 3), activation='relu'))\n",
    "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "    model.add(Dropout(0.25))\n",
    "    model.add(Flatten())\n",
    "    model.add(Dense(128, activation='relu'))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(10, activation='softmax'))\n",
    "\n",
    "    model.compile(loss=keras.losses.categorical_crossentropy,\n",
    "                  optimizer=keras.optimizers.SGD(lr=0.01),\n",
    "                  metrics=['accuracy'])\n",
    "\n",
    "    model.save(url)\n",
    "\n",
    "keras_model_path = './KerasMnist.h5'\n",
    "create_keras_base_model(keras_model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_keras_to_mlmodel(keras_url, mlmodel_url):\n",
    "    \"\"\"This method simply converts the keras model to a mlmodel using coremltools.\n",
    "    keras_url - The URL the keras model will be loaded.\n",
    "    mlmodel_url - the URL the Core ML model will be saved.\n",
    "    \"\"\"\n",
    "    from keras.models import load_model\n",
    "    keras_model = load_model(keras_url)\n",
    "    \n",
    "    from coremltools.converters import keras as keras_converter\n",
    "    class_labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']\n",
    "    mlmodel = keras_converter.convert(keras_model, input_names=['image'],\n",
    "                                output_names=['digitProbabilities'],\n",
    "                                class_labels=class_labels,\n",
    "                                predicted_feature_name='digit')\n",
    "    \n",
    "    mlmodel.save(mlmodel_url)\n",
    "     \n",
    "coreml_model_path = './MNISTDigitClassifier.mlmodel'\n",
    "convert_keras_to_mlmodel(keras_model_path , coreml_model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Id: 9], Name: dense_2__activation__ (Type: softmax)\n",
      "          Updatable: False\n",
      "          Input blobs: [u'dense_2_output']\n",
      "          Output blobs: [u'digitProbabilities']\n",
      "[Id: 8], Name: dense_2 (Type: innerProduct)\n",
      "          Updatable: False\n",
      "          Input blobs: [u'dense_1__activation___output']\n",
      "          Output blobs: [u'dense_2_output']\n",
      "[Id: 7], Name: dense_1__activation__ (Type: activation)\n",
      "          Updatable: False\n",
      "          Input blobs: [u'dense_1_output']\n",
      "          Output blobs: [u'dense_1__activation___output']\n"
     ]
    }
   ],
   "source": [
    "# let's inspect the last few layers of this model\n",
    "import coremltools\n",
    "spec = coremltools.utils.load_spec(coreml_model_path)\n",
    "builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)\n",
    "builder.inspect_layers(last=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Id: 0] Name: image\n",
      "          Type: multiArrayType {\n",
      "  shape: 1\n",
      "  shape: 28\n",
      "  shape: 28\n",
      "  dataType: DOUBLE\n",
      "}\n",
      "\n",
      "[Id: 0] Name: image\n",
      "          Type: imageType {\n",
      "  width: 28\n",
      "  height: 28\n",
      "  colorSpace: GRAYSCALE\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# let's inspect the input of the model as we need this information later on the make_updatable method\n",
    "builder.inspect_input_features()\n",
    "\n",
    "neuralnetwork_spec = builder.spec\n",
    "\n",
    "# change the input so the model can accept 28x28 grayscale images\n",
    "neuralnetwork_spec.description.input[0].type.imageType.width = 28\n",
    "neuralnetwork_spec.description.input[0].type.imageType.height = 28\n",
    "\n",
    "from coremltools.proto import FeatureTypes_pb2 as _FeatureTypes_pb2\n",
    "grayscale = _FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('GRAYSCALE')\n",
    "neuralnetwork_spec.description.input[0].type.imageType.colorSpace = grayscale\n",
    "\n",
    "# let's inspect the input again to confirm the change in input type\n",
    "builder.inspect_input_features()\n",
    "\n",
    "# Set input and output description\n",
    "neuralnetwork_spec.description.input[0].shortDescription = 'Input image of the handwriten digit to classify'\n",
    "neuralnetwork_spec.description.output[0].shortDescription = 'Probabilities / score for each possible digit'\n",
    "neuralnetwork_spec.description.output[1].shortDescription = 'Predicted digit'\n",
    "\n",
    "# Provide metadata\n",
    "neuralnetwork_spec.description.metadata.author = 'Core ML Tools'\n",
    "neuralnetwork_spec.description.metadata.license = 'MIT'\n",
    "neuralnetwork_spec.description.metadata.shortDescription = (\n",
    "        'An updatable hand-written digit classifier setup to train or be fine-tuned on MNIST like data.')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Now adding input digitProbabilities_true as target for categorical cross-entropy loss layer.\n"
     ]
    }
   ],
   "source": [
    "def make_updatable(builder, mlmodel_url, mlmodel_updatable_path):\n",
    "    \"\"\"This method makes an existing non-updatable mlmodel updatable.\n",
    "    mlmodel_url - the path the Core ML model is stored.\n",
    "    mlmodel_updatable_path - the path the updatable Core ML model will be saved.\n",
    "    \"\"\"\n",
    "    import coremltools\n",
    "    model_spec = builder.spec\n",
    "\n",
    "    # make_updatable method is used to make a layer updatable. It requires a list of layer names.\n",
    "    # dense_1 and dense_2 are two innerProduct layer in this example and we make them updatable.\n",
    "    builder.make_updatable(['dense_1', 'dense_2'])\n",
    "\n",
    "    # Categorical Cross Entropy or Mean Squared Error can be chosen for the loss layer.\n",
    "    # Categorical Cross Entropy is used on this example. CCE requires two inputs: 'name' and 'input'.\n",
    "    # name must be a string and will be the name associated with the loss layer\n",
    "    # input must be the output of a softmax layer in the case of CCE. \n",
    "    # The loss's target will be provided automatically as a part of the model's training inputs.\n",
    "    builder.set_categorical_cross_entropy_loss(name='lossLayer', input='digitProbabilities')\n",
    "\n",
    "    # in addition of the loss layer, an optimizer must also be defined. SGD and Adam optimizers are supported.\n",
    "    # SGD has been used for this example. To use SGD, one must set lr(learningRate) and batch(miniBatchSize) (momentum is an optional parameter).\n",
    "    from coremltools.models.neural_network import SgdParams\n",
    "    builder.set_sgd_optimizer(SgdParams(lr=0.01, batch=32))\n",
    "\n",
    "    # Finally, the number of epochs must be set as follows.\n",
    "    builder.set_epochs(10)\n",
    "        \n",
    "    # Set training inputs descriptions\n",
    "    model_spec.description.trainingInput[0].shortDescription = 'Example image of handwritten digit'\n",
    "    model_spec.description.trainingInput[1].shortDescription = 'Associated true label (digit) of example image'\n",
    "\n",
    "    # save the updated spec\n",
    "    from coremltools.models import MLModel\n",
    "    mlmodel_updatable = MLModel(model_spec)\n",
    "    mlmodel_updatable.save(mlmodel_updatable_path)\n",
    "\n",
    "coreml_updatable_model_path = './UpdatableMNISTDigitClassifier.mlmodel'\n",
    "make_updatable(builder, coreml_model_path, coreml_updatable_model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Id: 0], Name: lossLayer (Type: categoricalCrossEntropyLossLayer)\n",
      "          Loss Input: digitProbabilities\n",
      "          Loss Target: digitProbabilities_true\n"
     ]
    }
   ],
   "source": [
    "# let's inspect the loss layer of the Core ML model\n",
    "import coremltools\n",
    "spec = coremltools.utils.load_spec(coreml_updatable_model_path)\n",
    "builder = coremltools.models.neural_network.NeuralNetworkBuilder(spec=spec)\n",
    "\n",
    "builder.inspect_loss_layers()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Optimizer Type: sgdOptimizer\n",
      "lr: 0.01, min: 0.0, max: 1.0\n",
      "batch: 32, allowed_set: [32L]\n",
      "momentum: 0.0, min: 0.0, max: 1.0\n"
     ]
    }
   ],
   "source": [
    "# let's inspect the optimizer of the Core ML model\n",
    "builder.inspect_optimizer()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name: dense_2 (Type: innerProduct)\n",
      "          Input blobs: [u'dense_1__activation___output']\n",
      "          Output blobs: [u'dense_2_output']\n",
      "Name: dense_1 (Type: innerProduct)\n",
      "          Input blobs: [u'flatten_1_output']\n",
      "          Output blobs: [u'dense_1_output']\n"
     ]
    }
   ],
   "source": [
    "# let's see which layes are updatable\n",
    "builder.inspect_updatable_layers()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
